Goal: Do a PCA plot using Irene’s SNPs to look for relatedness among plants. Also figure out loading (which genes contribute most)
First, need to call SNPs using all BAM files at once:
working in directory 2019/IreneSnps on whitney
first, sort bams
for f in SNPanalysis/*LT*rmdup.bam
do
newname=`basename $f .bam`_sort.bam
samtools sort -o $newname --reference SNPanalysis/Pinsaporeference1 $f
done
next, assign read groups to bams
input=""
for f in `ls *sort.bam`
do
rg=`basename $f _rmdup_sort.bam`
input="$input -b $f -r $rg -s $rg"
done
echo $input
bamaddrg $input > LT_rmdup_sort_combined.bam
samtools index LT_rmdup_sort_combined.bam
freebayes -f SNPanalysis/Pinsaporeference1 --no-indels --no-mnps --no-complex LT_rmdup_sort_combined.bam > LT.vcf &
try parallel
ulimit -n 4000
/usr/local/stow/freebayes/scripts/fasta_generate_regions.py Pinsaporeference1.fai 100000 > regions
./freebayes-parallel regions 8 -f Pinsaporeference1 --no-indels --no-mnps --no-complex LT_rmdup_sort_combined.bam > LT.vcf
(note: I edited the freebayes-parallel script so that it would work…)
Freeybayes parallel takes about 12 hours
scp whitney.plb.ucdavis.edu:2019/IreneSnps/LT.vcf.gz ../input/
library(tidyverse)
library(ggrepel)
get the vcf header
vcf.header <- system("zgrep '#C' ../input/LT.vcf.gz",intern = TRUE)
vcf.header
[1] "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t38LTR\t42LTR\t42LTRR\t43LTR\t43LTRR\t49LTWR\t49LTWRR\t95LTWR\t95LTWRR\t99LTWR"
vcf.header <- vcf.header %>%
str_replace("#","") %>% #get rid of the pound sign
str_split(pattern = "\t") %>% #split on the tabs
magrittr::extract2(1)
vcf.header
[1] "CHROM" "POS" "ID" "REF" "ALT" "QUAL"
[7] "FILTER" "INFO" "FORMAT" "38LTR" "42LTR" "42LTRR"
[13] "43LTR" "43LTRR" "49LTWR" "49LTWRR" "95LTWR" "95LTWRR"
[19] "99LTWR"
get the data
snps <- read_tsv("../input/LT.vcf.gz", na = c("","NA","."),comment="#",col_names = vcf.header) %>%
select(-ID, -FILTER) # these are empty columns
Parsed with column specification:
cols(
CHROM = [31mcol_character()[39m,
POS = [32mcol_double()[39m,
ID = [33mcol_logical()[39m,
REF = [31mcol_character()[39m,
ALT = [31mcol_character()[39m,
QUAL = [32mcol_double()[39m,
FILTER = [33mcol_logical()[39m,
INFO = [31mcol_character()[39m,
FORMAT = [31mcol_character()[39m,
`38LTR` = [31mcol_character()[39m,
`42LTR` = [31mcol_character()[39m,
`42LTRR` = [31mcol_character()[39m,
`43LTR` = [31mcol_character()[39m,
`43LTRR` = [31mcol_character()[39m,
`49LTWR` = [31mcol_character()[39m,
`49LTWRR` = [31mcol_character()[39m,
`95LTWR` = [31mcol_character()[39m,
`95LTWRR` = [31mcol_character()[39m,
`99LTWR` = [31mcol_character()[39m
)
|= | 2% 11 MB
|= | 2% 11 MB
|= | 3% 12 MB
|= | 3% 12 MB
|= | 3% 12 MB
|= | 3% 13 MB
|= | 3% 13 MB
|= | 3% 13 MB
|= | 3% 14 MB
|== | 3% 14 MB
|== | 3% 15 MB
|== | 3% 15 MB
|== | 4% 15 MB
|== | 4% 16 MB
|== | 4% 16 MB
|== | 4% 17 MB
|== | 4% 17 MB
|== | 4% 17 MB
|== | 4% 18 MB
|== | 4% 18 MB
|== | 4% 18 MB
|== | 4% 19 MB
|== | 5% 19 MB
|== | 5% 20 MB
|== | 5% 20 MB
|== | 5% 21 MB
|== | 5% 21 MB
|== | 5% 21 MB
|=== | 5% 22 MB
|=== | 5% 22 MB
|=== | 5% 22 MB
|=== | 5% 23 MB
|=== | 6% 23 MB
|=== | 6% 24 MB
|=== | 6% 24 MB
|=== | 6% 24 MB
|=== | 6% 25 MB
|=== | 6% 25 MB
|=== | 6% 26 MB
|=== | 6% 26 MB
|=== | 6% 26 MB
|=== | 6% 27 MB
|=== | 6% 27 MB
|=== | 7% 27 MB
|=== | 7% 28 MB
|=== | 7% 28 MB
|=== | 7% 29 MB
|==== | 7% 29 MB
|==== | 7% 29 MB
|==== | 7% 30 MB
|==== | 7% 30 MB
|==== | 7% 31 MB
|==== | 7% 31 MB
|==== | 8% 31 MB
|==== | 8% 32 MB
|==== | 8% 32 MB
|==== | 8% 33 MB
|==== | 8% 33 MB
|==== | 8% 33 MB
|==== | 8% 34 MB
|==== | 8% 34 MB
|==== | 8% 34 MB
|==== | 8% 35 MB
|==== | 9% 35 MB
|==== | 9% 36 MB
|==== | 9% 36 MB
|===== | 9% 36 MB
|===== | 9% 37 MB
|===== | 9% 37 MB
|===== | 9% 38 MB
|===== | 9% 38 MB
|===== | 9% 38 MB
|===== | 9% 39 MB
|===== | 10% 39 MB
|===== | 10% 39 MB
|===== | 10% 40 MB
|===== | 10% 40 MB
|===== | 10% 41 MB
|===== | 10% 41 MB
|===== | 10% 41 MB
|===== | 10% 42 MB
|===== | 10% 42 MB
|===== | 10% 43 MB
|===== | 10% 43 MB
|===== | 11% 43 MB
|====== | 11% 44 MB
|====== | 11% 44 MB
|====== | 11% 45 MB
|====== | 11% 45 MB
|====== | 11% 45 MB
|====== | 11% 46 MB
|====== | 11% 46 MB
|====== | 11% 46 MB
|====== | 11% 47 MB
|====== | 12% 47 MB
|====== | 12% 48 MB
|====== | 12% 48 MB
|====== | 12% 48 MB
|====== | 12% 49 MB
|====== | 12% 49 MB
|====== | 12% 50 MB
|====== | 12% 50 MB
|====== | 12% 50 MB
|====== | 12% 51 MB
|======= | 13% 51 MB
|======= | 13% 51 MB
|======= | 13% 52 MB
|======= | 13% 52 MB
|======= | 13% 53 MB
|======= | 13% 53 MB
|======= | 13% 53 MB
|======= | 13% 54 MB
|======= | 13% 54 MB
|======= | 13% 54 MB
|======= | 14% 55 MB
|======= | 14% 55 MB
|======= | 14% 56 MB
|======= | 14% 56 MB
|======= | 14% 56 MB
|======= | 14% 57 MB
|======= | 14% 57 MB
|======= | 14% 58 MB
|======= | 14% 58 MB
|======== | 14% 58 MB
|======== | 14% 59 MB
|======== | 15% 59 MB
|======== | 15% 60 MB
|======== | 15% 60 MB
|======== | 15% 60 MB
|======== | 15% 61 MB
|======== | 15% 61 MB
|======== | 15% 61 MB
|======== | 15% 62 MB
|======== | 15% 62 MB
|======== | 15% 63 MB
|======== | 16% 63 MB
|======== | 16% 63 MB
|======== | 16% 64 MB
|======== | 16% 64 MB
|======== | 16% 65 MB
|======== | 16% 65 MB
|======== | 16% 65 MB
|========= | 16% 66 MB
|========= | 16% 66 MB
|========= | 16% 66 MB
|========= | 17% 67 MB
|========= | 17% 67 MB
|========= | 17% 68 MB
|========= | 17% 68 MB
|========= | 17% 68 MB
|========= | 17% 69 MB
|========= | 17% 69 MB
|========= | 17% 70 MB
|========= | 17% 70 MB
|========= | 17% 70 MB
|========= | 18% 71 MB
|========= | 18% 71 MB
|========= | 18% 71 MB
|========= | 18% 72 MB
|========= | 18% 72 MB
|========= | 18% 73 MB
|========== | 18% 73 MB
|========== | 18% 73 MB
|========== | 18% 74 MB
|========== | 18% 74 MB
|========== | 18% 75 MB
|========== | 19% 75 MB
|========== | 19% 75 MB
|========== | 19% 76 MB
|========== | 19% 76 MB
|========== | 19% 77 MB
|========== | 19% 77 MB
|========== | 19% 77 MB
|========== | 19% 78 MB
|========== | 19% 78 MB
|========== | 19% 78 MB
|========== | 20% 79 MB
|========== | 20% 79 MB
|========== | 20% 80 MB
|========== | 20% 80 MB
|=========== | 20% 80 MB
|=========== | 20% 81 MB
|=========== | 20% 81 MB
|=========== | 20% 81 MB
|=========== | 20% 82 MB
|=========== | 20% 82 MB
|=========== | 21% 83 MB
|=========== | 21% 83 MB
|=========== | 21% 83 MB
|=========== | 21% 84 MB
|=========== | 21% 84 MB
|=========== | 21% 84 MB
|=========== | 21% 85 MB
|=========== | 21% 85 MB
|=========== | 21% 86 MB
|=========== | 21% 86 MB
|=========== | 21% 86 MB
|=========== | 22% 87 MB
|=========== | 22% 87 MB
|============ | 22% 88 MB
|============ | 22% 88 MB
|============ | 22% 88 MB
|============ | 22% 89 MB
|============ | 22% 89 MB
|============ | 22% 89 MB
|============ | 22% 90 MB
|============ | 22% 90 MB
|============ | 23% 91 MB
|============ | 23% 91 MB
|============ | 23% 91 MB
|============ | 23% 92 MB
|============ | 23% 92 MB
|============ | 23% 92 MB
|============ | 23% 93 MB
|============ | 23% 93 MB
|============ | 23% 94 MB
|============ | 23% 94 MB
|============ | 24% 94 MB
|============= | 24% 95 MB
|============= | 24% 95 MB
|============= | 24% 96 MB
|============= | 24% 96 MB
|============= | 24% 96 MB
|============= | 24% 97 MB
|============= | 24% 97 MB
|============= | 24% 97 MB
|============= | 24% 98 MB
|============= | 24% 98 MB
|============= | 25% 99 MB
|============= | 25% 99 MB
|============= | 25% 99 MB
|============= | 25% 100 MB
|============= | 25% 100 MB
|============= | 25% 101 MB
|============= | 25% 101 MB
|============= | 25% 101 MB
|============= | 25% 102 MB
|============== | 25% 102 MB
|============== | 26% 102 MB
|============== | 26% 103 MB
|============== | 26% 103 MB
|============== | 26% 104 MB
|============== | 26% 104 MB
|============== | 26% 104 MB
|============== | 26% 105 MB
|============== | 26% 105 MB
|============== | 26% 106 MB
|============== | 26% 106 MB
|============== | 27% 106 MB
|============== | 27% 107 MB
|============== | 27% 107 MB
|============== | 27% 107 MB
|============== | 27% 108 MB
|============== | 27% 108 MB
|============== | 27% 109 MB
|============== | 27% 109 MB
|=============== | 27% 109 MB
|=============== | 27% 110 MB
|=============== | 28% 110 MB
|=============== | 28% 111 MB
|=============== | 28% 111 MB
|=============== | 28% 111 MB
|=============== | 28% 112 MB
|=============== | 28% 112 MB
|=============== | 28% 113 MB
|=============== | 28% 113 MB
|=============== | 28% 113 MB
|=============== | 28% 114 MB
|=============== | 28% 114 MB
|=============== | 29% 114 MB
|=============== | 29% 115 MB
|=============== | 29% 115 MB
|=============== | 29% 116 MB
|=============== | 29% 116 MB
|=============== | 29% 116 MB
|================ | 29% 117 MB
|================ | 29% 117 MB
|================ | 29% 118 MB
|================ | 29% 118 MB
|================ | 30% 118 MB
|================ | 30% 119 MB
|================ | 30% 119 MB
|================ | 30% 120 MB
|================ | 30% 120 MB
|================ | 30% 120 MB
|================ | 30% 121 MB
|================ | 30% 121 MB
|================ | 30% 121 MB
|================ | 30% 122 MB
|================ | 31% 122 MB
|================ | 31% 123 MB
|================ | 31% 123 MB
|================ | 31% 123 MB
|================ | 31% 124 MB
|================= | 31% 124 MB
|================= | 31% 125 MB
|================= | 31% 125 MB
|================= | 31% 125 MB
|================= | 31% 126 MB
|================= | 32% 126 MB
|================= | 32% 126 MB
|================= | 32% 127 MB
|================= | 32% 127 MB
|================= | 32% 128 MB
|================= | 32% 128 MB
|================= | 32% 128 MB
|================= | 32% 129 MB
|================= | 32% 129 MB
|================= | 32% 130 MB
|================= | 33% 130 MB
|================= | 33% 131 MB
|================= | 33% 131 MB
|================== | 33% 131 MB
|================== | 33% 132 MB
|================== | 33% 132 MB
|================== | 33% 132 MB
|================== | 33% 133 MB
|================== | 33% 133 MB
|================== | 33% 134 MB
|================== | 34% 134 MB
|================== | 34% 134 MB
|================== | 34% 135 MB
|================== | 34% 135 MB
|================== | 34% 136 MB
|================== | 34% 136 MB
|================== | 34% 136 MB
|================== | 34% 137 MB
|================== | 34% 137 MB
|================== | 34% 138 MB
|================== | 35% 138 MB
|================== | 35% 138 MB
|=================== | 35% 139 MB
|=================== | 35% 139 MB
|=================== | 35% 139 MB
|=================== | 35% 140 MB
|=================== | 35% 140 MB
|=================== | 35% 141 MB
|=================== | 35% 141 MB
|=================== | 35% 141 MB
|=================== | 36% 142 MB
|=================== | 36% 142 MB
|=================== | 36% 143 MB
|=================== | 36% 143 MB
|=================== | 36% 143 MB
|=================== | 36% 144 MB
|=================== | 36% 144 MB
|=================== | 36% 145 MB
|=================== | 36% 145 MB
|=================== | 36% 145 MB
|=================== | 37% 146 MB
|==================== | 37% 146 MB
|==================== | 37% 147 MB
|==================== | 37% 147 MB
|==================== | 37% 147 MB
|==================== | 37% 148 MB
|==================== | 37% 148 MB
|==================== | 37% 148 MB
|==================== | 37% 149 MB
|==================== | 37% 149 MB
|==================== | 37% 150 MB
|==================== | 38% 150 MB
|==================== | 38% 150 MB
|==================== | 38% 151 MB
|==================== | 38% 151 MB
|==================== | 38% 152 MB
|==================== | 38% 152 MB
|==================== | 38% 152 MB
|==================== | 38% 153 MB
|==================== | 38% 153 MB
|===================== | 38% 154 MB
|===================== | 39% 154 MB
|===================== | 39% 154 MB
|===================== | 39% 155 MB
|===================== | 39% 155 MB
|===================== | 39% 156 MB
|===================== | 39% 156 MB
|===================== | 39% 156 MB
|===================== | 39% 157 MB
|===================== | 39% 157 MB
|===================== | 39% 158 MB
|===================== | 40% 158 MB
|===================== | 40% 158 MB
|===================== | 40% 159 MB
|===================== | 40% 159 MB
|===================== | 40% 159 MB
|===================== | 40% 160 MB
|===================== | 40% 160 MB
|====================== | 40% 161 MB
|====================== | 40% 161 MB
|====================== | 40% 161 MB
|====================== | 41% 162 MB
|====================== | 41% 162 MB
|====================== | 41% 163 MB
|====================== | 41% 163 MB
|====================== | 41% 163 MB
|====================== | 41% 164 MB
|====================== | 41% 164 MB
|====================== | 41% 165 MB
|====================== | 41% 165 MB
|====================== | 42% 166 MB
|====================== | 42% 166 MB
|====================== | 42% 166 MB
|====================== | 42% 167 MB
|====================== | 42% 167 MB
|====================== | 42% 168 MB
|======================= | 42% 168 MB
|======================= | 42% 169 MB
|======================= | 42% 169 MB
|======================= | 42% 169 MB
|======================= | 43% 170 MB
|======================= | 43% 170 MB
|======================= | 43% 170 MB
|======================= | 43% 171 MB
|======================= | 43% 171 MB
|======================= | 43% 172 MB
|======================= | 43% 172 MB
|======================= | 43% 173 MB
|======================= | 43% 173 MB
|======================= | 43% 173 MB
|======================= | 44% 174 MB
|======================= | 44% 174 MB
|======================= | 44% 174 MB
|======================= | 44% 175 MB
|======================== | 44% 175 MB
|======================== | 44% 176 MB
|======================== | 44% 176 MB
|======================== | 44% 176 MB
|======================== | 44% 177 MB
|======================== | 44% 177 MB
|======================== | 45% 178 MB
|======================== | 45% 178 MB
|======================== | 45% 178 MB
|======================== | 45% 179 MB
|======================== | 45% 179 MB
|======================== | 45% 180 MB
|======================== | 45% 180 MB
|======================== | 45% 180 MB
|======================== | 45% 181 MB
|======================== | 45% 181 MB
|======================== | 46% 182 MB
|======================== | 46% 182 MB
|======================== | 46% 182 MB
|========================= | 46% 183 MB
|========================= | 46% 183 MB
|========================= | 46% 184 MB
|========================= | 46% 184 MB
|========================= | 46% 184 MB
|========================= | 46% 185 MB
|========================= | 46% 185 MB
|========================= | 47% 185 MB
|========================= | 47% 186 MB
|========================= | 47% 186 MB
|========================= | 47% 187 MB
|========================= | 47% 187 MB
|========================= | 47% 188 MB
|========================= | 47% 188 MB
|========================= | 47% 188 MB
|========================= | 47% 189 MB
|========================= | 47% 189 MB
|========================= | 48% 190 MB
|========================== | 48% 190 MB
|========================== | 48% 190 MB
|========================== | 48% 191 MB
|========================== | 48% 191 MB
|========================== | 48% 191 MB
|========================== | 48% 192 MB
|========================== | 48% 192 MB
|========================== | 48% 193 MB
|========================== | 48% 193 MB
|========================== | 49% 194 MB
|========================== | 49% 194 MB
|========================== | 49% 194 MB
|========================== | 49% 195 MB
|========================== | 49% 195 MB
|========================== | 49% 196 MB
|========================== | 49% 196 MB
|========================== | 49% 196 MB
|========================== | 49% 197 MB
|=========================== | 50% 197 MB
|=========================== | 50% 198 MB
|=========================== | 50% 198 MB
|=========================== | 50% 198 MB
|=========================== | 50% 199 MB
|=========================== | 50% 199 MB
|=========================== | 50% 200 MB
|=========================== | 50% 200 MB
|=========================== | 50% 200 MB
|=========================== | 50% 201 MB
|=========================== | 50% 201 MB
|=========================== | 51% 201 MB
|=========================== | 51% 202 MB
|=========================== | 51% 202 MB
|=========================== | 51% 203 MB
|=========================== | 51% 203 MB
|=========================== | 51% 203 MB
|=========================== | 51% 204 MB
|=========================== | 51% 204 MB
|============================ | 51% 205 MB
|============================ | 51% 205 MB
|============================ | 52% 205 MB
|============================ | 52% 206 MB
|============================ | 52% 206 MB
|============================ | 52% 207 MB
|============================ | 52% 207 MB
|============================ | 52% 207 MB
|============================ | 52% 208 MB
|============================ | 52% 208 MB
|============================ | 52% 209 MB
|============================ | 52% 209 MB
|============================ | 53% 209 MB
|============================ | 53% 210 MB
|============================ | 53% 210 MB
|============================ | 53% 211 MB
|============================ | 53% 211 MB
|============================ | 53% 211 MB
|============================ | 53% 212 MB
|============================= | 53% 212 MB
|============================= | 53% 213 MB
|============================= | 54% 213 MB
|============================= | 54% 213 MB
|============================= | 54% 214 MB
|============================= | 54% 214 MB
|============================= | 54% 214 MB
|============================= | 54% 215 MB
|============================= | 54% 215 MB
|============================= | 54% 216 MB
|============================= | 54% 216 MB
|============================= | 54% 216 MB
|============================= | 54% 217 MB
|============================= | 55% 217 MB
|============================= | 55% 218 MB
|============================= | 55% 218 MB
|============================= | 55% 218 MB
|============================= | 55% 219 MB
|============================== | 55% 219 MB
|============================== | 55% 220 MB
|============================== | 55% 220 MB
|============================== | 55% 220 MB
|============================== | 55% 221 MB
|============================== | 56% 221 MB
|============================== | 56% 221 MB
|============================== | 56% 222 MB
|============================== | 56% 222 MB
|============================== | 56% 223 MB
|============================== | 56% 223 MB
|============================== | 56% 223 MB
|============================== | 56% 224 MB
|============================== | 56% 224 MB
|============================== | 56% 225 MB
|============================== | 57% 225 MB
|============================== | 57% 225 MB
|============================== | 57% 226 MB
|============================== | 57% 226 MB
|=============================== | 57% 227 MB
|=============================== | 57% 227 MB
|=============================== | 57% 227 MB
|=============================== | 57% 228 MB
|=============================== | 57% 228 MB
|=============================== | 57% 229 MB
|=============================== | 58% 229 MB
|=============================== | 58% 229 MB
|=============================== | 58% 230 MB
|=============================== | 58% 230 MB
|=============================== | 58% 230 MB
|=============================== | 58% 231 MB
|=============================== | 58% 231 MB
|=============================== | 58% 232 MB
|=============================== | 58% 232 MB
|=============================== | 58% 232 MB
|=============================== | 59% 233 MB
|=============================== | 59% 233 MB
|=============================== | 59% 234 MB
|================================ | 59% 234 MB
|================================ | 59% 234 MB
|================================ | 59% 235 MB
|================================ | 59% 235 MB
|================================ | 59% 236 MB
|================================ | 59% 236 MB
|================================ | 59% 236 MB
|================================ | 60% 237 MB
|================================ | 60% 237 MB
|================================ | 60% 237 MB
|================================ | 60% 238 MB
|================================ | 60% 238 MB
|================================ | 60% 239 MB
|================================ | 60% 239 MB
|================================ | 60% 239 MB
|================================ | 60% 240 MB
|================================ | 60% 240 MB
|================================ | 61% 241 MB
|================================ | 61% 241 MB
|================================= | 61% 241 MB
|================================= | 61% 242 MB
|================================= | 61% 242 MB
|================================= | 61% 243 MB
|================================= | 61% 243 MB
|================================= | 61% 243 MB
|================================= | 61% 244 MB
|================================= | 61% 244 MB
|================================= | 61% 245 MB
|================================= | 62% 245 MB
|================================= | 62% 245 MB
|================================= | 62% 246 MB
|================================= | 62% 246 MB
|================================= | 62% 246 MB
|================================= | 62% 247 MB
|================================= | 62% 247 MB
|================================= | 62% 248 MB
|================================= | 62% 248 MB
|================================== | 62% 248 MB
|================================== | 63% 249 MB
|================================== | 63% 249 MB
|================================== | 63% 250 MB
|================================== | 63% 250 MB
|================================== | 63% 250 MB
|================================== | 63% 251 MB
|================================== | 63% 251 MB
|================================== | 63% 252 MB
|================================== | 63% 252 MB
|================================== | 63% 252 MB
|================================== | 64% 253 MB
|================================== | 64% 253 MB
|================================== | 64% 253 MB
|================================== | 64% 254 MB
|================================== | 64% 254 MB
|================================== | 64% 255 MB
|================================== | 64% 255 MB
|================================== | 64% 255 MB
|=================================== | 64% 256 MB
|=================================== | 64% 256 MB
|=================================== | 65% 257 MB
|=================================== | 65% 257 MB
|=================================== | 65% 257 MB
|=================================== | 65% 258 MB
|=================================== | 65% 258 MB
|=================================== | 65% 259 MB
|=================================== | 65% 259 MB
|=================================== | 65% 259 MB
|=================================== | 65% 260 MB
|=================================== | 65% 260 MB
|=================================== | 66% 261 MB
|=================================== | 66% 261 MB
|=================================== | 66% 261 MB
|=================================== | 66% 262 MB
|=================================== | 66% 262 MB
|=================================== | 66% 263 MB
|=================================== | 66% 263 MB
|==================================== | 66% 263 MB
|==================================== | 66% 264 MB
|==================================== | 66% 264 MB
|==================================== | 67% 264 MB
|==================================== | 67% 265 MB
|==================================== | 67% 265 MB
|==================================== | 67% 266 MB
|==================================== | 67% 266 MB
|==================================== | 67% 266 MB
|==================================== | 67% 267 MB
|==================================== | 67% 267 MB
|==================================== | 67% 268 MB
|==================================== | 67% 268 MB
|==================================== | 68% 268 MB
|==================================== | 68% 269 MB
|==================================== | 68% 269 MB
|==================================== | 68% 270 MB
|==================================== | 68% 270 MB
|===================================== | 68% 270 MB
|===================================== | 68% 271 MB
|===================================== | 68% 271 MB
|===================================== | 68% 271 MB
|===================================== | 68% 272 MB
|===================================== | 69% 272 MB
|===================================== | 69% 273 MB
|===================================== | 69% 273 MB
|===================================== | 69% 274 MB
|===================================== | 69% 274 MB
|===================================== | 69% 274 MB
|===================================== | 69% 275 MB
|===================================== | 69% 275 MB
|===================================== | 69% 276 MB
|===================================== | 69% 276 MB
|===================================== | 70% 276 MB
|===================================== | 70% 277 MB
|===================================== | 70% 277 MB
|===================================== | 70% 277 MB
|====================================== | 70% 278 MB
|====================================== | 70% 278 MB
|====================================== | 70% 279 MB
|====================================== | 70% 279 MB
|====================================== | 70% 279 MB
|====================================== | 70% 280 MB
|====================================== | 71% 280 MB
|====================================== | 71% 281 MB
|====================================== | 71% 281 MB
|====================================== | 71% 281 MB
|====================================== | 71% 282 MB
|====================================== | 71% 282 MB
|====================================== | 71% 282 MB
|====================================== | 71% 283 MB
|====================================== | 71% 283 MB
|====================================== | 71% 284 MB
|====================================== | 71% 284 MB
|====================================== | 72% 284 MB
|====================================== | 72% 285 MB
|======================================= | 72% 285 MB
|======================================= | 72% 286 MB
|======================================= | 72% 286 MB
|======================================= | 72% 286 MB
|======================================= | 72% 287 MB
|======================================= | 72% 287 MB
|======================================= | 72% 287 MB
|======================================= | 72% 288 MB
|======================================= | 73% 288 MB
|======================================= | 73% 289 MB
|======================================= | 73% 289 MB
|======================================= | 73% 289 MB
|======================================= | 73% 290 MB
|======================================= | 73% 290 MB
|======================================= | 73% 291 MB
|======================================= | 73% 291 MB
|======================================= | 73% 291 MB
|======================================= | 73% 292 MB
|======================================= | 74% 292 MB
|======================================== | 74% 293 MB
|======================================== | 74% 293 MB
|======================================== | 74% 293 MB
|======================================== | 74% 294 MB
|======================================== | 74% 294 MB
|======================================== | 74% 295 MB
|======================================== | 74% 295 MB
|======================================== | 74% 295 MB
|======================================== | 74% 296 MB
|======================================== | 75% 296 MB
|======================================== | 75% 296 MB
|======================================== | 75% 297 MB
|======================================== | 75% 297 MB
|======================================== | 75% 298 MB
|======================================== | 75% 298 MB
|======================================== | 75% 298 MB
|======================================== | 75% 299 MB
|======================================== | 75% 299 MB
|======================================== | 75% 300 MB
|========================================= | 76% 300 MB
|========================================= | 76% 300 MB
|========================================= | 76% 301 MB
|========================================= | 76% 301 MB
|========================================= | 76% 301 MB
|========================================= | 76% 302 MB
|========================================= | 76% 302 MB
|========================================= | 76% 303 MB
|========================================= | 76% 303 MB
|========================================= | 76% 303 MB
|========================================= | 76% 304 MB
|========================================= | 77% 304 MB
|========================================= | 77% 305 MB
|========================================= | 77% 305 MB
|========================================= | 77% 305 MB
|========================================= | 77% 306 MB
|========================================= | 77% 306 MB
|========================================= | 77% 307 MB
|========================================== | 77% 307 MB
|========================================== | 77% 307 MB
|========================================== | 78% 308 MB
|========================================== | 78% 308 MB
|========================================== | 78% 309 MB
|========================================== | 78% 309 MB
|========================================== | 78% 309 MB
|========================================== | 78% 310 MB
|========================================== | 78% 310 MB
|========================================== | 78% 310 MB
|========================================== | 78% 311 MB
|========================================== | 78% 311 MB
|========================================== | 78% 312 MB
|========================================== | 79% 312 MB
|========================================== | 79% 312 MB
|========================================== | 79% 313 MB
|========================================== | 79% 313 MB
|========================================== | 79% 314 MB
|========================================== | 79% 314 MB
|=========================================== | 79% 314 MB
|=========================================== | 79% 315 MB
|=========================================== | 79% 315 MB
|=========================================== | 79% 316 MB
|=========================================== | 80% 316 MB
|=========================================== | 80% 316 MB
|=========================================== | 80% 317 MB
|=========================================== | 80% 317 MB
|=========================================== | 80% 317 MB
|=========================================== | 80% 318 MB
|=========================================== | 80% 318 MB
|=========================================== | 80% 319 MB
|=========================================== | 80% 319 MB
|=========================================== | 80% 319 MB
|=========================================== | 81% 320 MB
|=========================================== | 81% 320 MB
|=========================================== | 81% 321 MB
|=========================================== | 81% 321 MB
|=========================================== | 81% 321 MB
|============================================ | 81% 322 MB
|============================================ | 81% 322 MB
|============================================ | 81% 323 MB
|============================================ | 81% 323 MB
|============================================ | 81% 323 MB
|============================================ | 82% 324 MB
|============================================ | 82% 324 MB
|============================================ | 82% 325 MB
|============================================ | 82% 325 MB
|============================================ | 82% 325 MB
|============================================ | 82% 326 MB
|============================================ | 82% 326 MB
|============================================ | 82% 327 MB
|============================================ | 82% 327 MB
|============================================ | 82% 327 MB
|============================================ | 83% 328 MB
|============================================ | 83% 328 MB
|============================================ | 83% 329 MB
|============================================= | 83% 329 MB
|============================================= | 83% 329 MB
|============================================= | 83% 330 MB
|============================================= | 83% 330 MB
|============================================= | 83% 330 MB
|============================================= | 83% 331 MB
|============================================= | 83% 331 MB
|============================================= | 84% 332 MB
|============================================= | 84% 332 MB
|============================================= | 84% 332 MB
|============================================= | 84% 333 MB
|============================================= | 84% 333 MB
|============================================= | 84% 334 MB
|============================================= | 84% 334 MB
|============================================= | 84% 334 MB
|============================================= | 84% 335 MB
|============================================= | 84% 335 MB
|============================================= | 85% 336 MB
|============================================= | 85% 336 MB
|============================================== | 85% 336 MB
|============================================== | 85% 337 MB
|============================================== | 85% 337 MB
|============================================== | 85% 338 MB
|============================================== | 85% 338 MB
|============================================== | 85% 338 MB
|============================================== | 85% 339 MB
|============================================== | 85% 339 MB
|============================================== | 86% 340 MB
|============================================== | 86% 340 MB
|============================================== | 86% 340 MB
|============================================== | 86% 341 MB
|============================================== | 86% 341 MB
|============================================== | 86% 342 MB
|============================================== | 86% 342 MB
|============================================== | 86% 342 MB
|============================================== | 86% 343 MB
|============================================== | 86% 343 MB
|============================================== | 87% 343 MB
|=============================================== | 87% 344 MB
|=============================================== | 87% 344 MB
|=============================================== | 87% 345 MB
|=============================================== | 87% 345 MB
|=============================================== | 87% 345 MB
|=============================================== | 87% 346 MB
|=============================================== | 87% 346 MB
|=============================================== | 87% 346 MB
|=============================================== | 87% 347 MB
|=============================================== | 87% 347 MB
|=============================================== | 88% 348 MB
|=============================================== | 88% 348 MB
|=============================================== | 88% 348 MB
|=============================================== | 88% 349 MB
|=============================================== | 88% 349 MB
|=============================================== | 88% 349 MB
|=============================================== | 88% 350 MB
|=============================================== | 88% 350 MB
|=============================================== | 88% 351 MB
|================================================ | 88% 351 MB
|================================================ | 89% 351 MB
|================================================ | 89% 352 MB
|================================================ | 89% 352 MB
|================================================ | 89% 352 MB
|================================================ | 89% 353 MB
|================================================ | 89% 353 MB
|================================================ | 89% 353 MB
|================================================ | 89% 354 MB
|================================================ | 89% 354 MB
|================================================ | 89% 355 MB
|================================================ | 89% 355 MB
|================================================ | 90% 355 MB
|================================================ | 90% 356 MB
|================================================ | 90% 356 MB
|================================================ | 90% 356 MB
|================================================ | 90% 357 MB
|================================================ | 90% 357 MB
|================================================ | 90% 357 MB
|================================================ | 90% 358 MB
|================================================= | 90% 358 MB
|================================================= | 90% 359 MB
|================================================= | 90% 359 MB
|================================================= | 91% 359 MB
|================================================= | 91% 360 MB
|================================================= | 91% 360 MB
|================================================= | 91% 361 MB
|================================================= | 91% 361 MB
|================================================= | 91% 361 MB
|================================================= | 91% 362 MB
|================================================= | 91% 362 MB
|================================================= | 91% 363 MB
|================================================= | 91% 363 MB
|================================================= | 92% 363 MB
|================================================= | 92% 364 MB
|================================================= | 92% 364 MB
|================================================= | 92% 364 MB
|================================================= | 92% 365 MB
|================================================= | 92% 365 MB
|================================================== | 92% 366 MB
|================================================== | 92% 366 MB
|================================================== | 92% 366 MB
|================================================== | 92% 367 MB
|================================================== | 92% 367 MB
|================================================== | 93% 367 MB
|================================================== | 93% 368 MB
|================================================== | 93% 368 MB
|================================================== | 93% 368 MB
|================================================== | 93% 369 MB
|================================================== | 93% 369 MB
|================================================== | 93% 370 MB
|================================================== | 93% 370 MB
|================================================== | 93% 370 MB
|================================================== | 93% 371 MB
|================================================== | 93% 371 MB
|================================================== | 94% 371 MB
|================================================== | 94% 372 MB
|================================================== | 94% 372 MB
|================================================== | 94% 372 MB
|=================================================== | 94% 373 MB
|=================================================== | 94% 373 MB
|=================================================== | 94% 373 MB
|=================================================== | 94% 374 MB
|=================================================== | 94% 374 MB
|=================================================== | 94% 375 MB
|=================================================== | 94% 375 MB
|=================================================== | 95% 375 MB
|=================================================== | 95% 376 MB
|=================================================== | 95% 376 MB
|=================================================== | 95% 376 MB
|=================================================== | 95% 377 MB
|=================================================== | 95% 377 MB
|=================================================== | 95% 377 MB
|=================================================== | 95% 378 MB
|=================================================== | 95% 378 MB
|=================================================== | 95% 378 MB
|=================================================== | 95% 379 MB
|=================================================== | 96% 379 MB
|=================================================== | 96% 379 MB
|=================================================== | 96% 380 MB
|==================================================== | 96% 380 MB
|==================================================== | 96% 380 MB
|==================================================== | 96% 381 MB
|==================================================== | 96% 381 MB
|==================================================== | 96% 381 MB
|==================================================== | 96% 382 MB
|==================================================== | 96% 382 MB
|==================================================== | 96% 383 MB
|==================================================== | 97% 383 MB
|==================================================== | 97% 383 MB
|==================================================== | 97% 384 MB
|==================================================== | 97% 384 MB
|==================================================== | 97% 384 MB
|==================================================== | 97% 385 MB
|==================================================== | 97% 385 MB
|==================================================== | 97% 385 MB
|==================================================== | 97% 386 MB
|==================================================== | 97% 386 MB
|==================================================== | 97% 386 MB
|==================================================== | 97% 387 MB
|==================================================== | 98% 387 MB
|==================================================== | 98% 387 MB
|=====================================================| 98% 388 MB
|=====================================================| 98% 388 MB
|=====================================================| 98% 388 MB
|=====================================================| 98% 389 MB
|=====================================================| 98% 389 MB
|=====================================================| 98% 389 MB
|=====================================================| 98% 390 MB
|=====================================================| 98% 390 MB
|=====================================================| 98% 390 MB
|=====================================================| 98% 391 MB
|=====================================================| 99% 391 MB
|=====================================================| 99% 391 MB
|=====================================================| 99% 392 MB
|=====================================================| 99% 392 MB
|=====================================================| 99% 392 MB
|=====================================================| 99% 393 MB
|=====================================================| 99% 393 MB
|=====================================================| 99% 393 MB
|=====================================================| 99% 394 MB
|=====================================================| 99% 394 MB
|=====================================================| 99% 394 MB
|=====================================================| 99% 394 MB
|======================================================| 100% 395 MB
snps
filter to keep snps where there is data from all samples
snps <- snps %>%
filter({select(., matches("[0-9]")) %>% complete.cases() })
snps
snps <- snps %>%
mutate(TOTAL_DEPTH= {str_extract(INFO, "DP=[0-9]*") %>%
str_remove("DP=") %>%
as.numeric() }
) %>%
filter(QUAL >=100,
nchar(ALT)==1,
TOTAL_DEPTH > quantile(TOTAL_DEPTH, 0.05),
TOTAL_DEPTH < quantile(TOTAL_DEPTH, 0.95))
snps
unpack the information differnet samples:
samples <- colnames(snps) %>% str_subset("^[0-9]")
for (s in samples) {
snps <- snps %>%
separate(!!s, into=paste(s,c("gt","tot.depth","allele.depth","ref.depth","ref.qual","alt.depth","alt.qual","gt.lik"),sep="_"),
sep=":", convert = TRUE)
}
snps
For the PCA we only need the genotype info
gts <- snps %>%
select(CHROM, POS, ends_with("_gt"))
gts
remove the 38LTR gample
gts <- gts %>% select(-`38LTR_gt`)
convert this to numeric
geno.numeric <- gts %>%
select(-CHROM, -POS) %>%
lapply(factor) %>% # convert charcters to "factors", where each category is internally represented as a number
as.data.frame() %>% # reformat
data.matrix() %>%# convert to numeric
t()
colnames(geno.numeric) <- str_c(gts$CHROM, "_", gts$POS)
head(geno.numeric[,1:5],10)
GCZN01000007.1_2815 GCZN01000007.1_2834
X42LTR_gt 2 3
X42LTRR_gt 2 2
X43LTR_gt 2 2
X43LTRR_gt 2 2
X49LTWR_gt 2 2
X49LTWRR_gt 3 3
X95LTWR_gt 2 3
X95LTWRR_gt 2 3
X99LTWR_gt 2 2
GCZN01000007.1_2865 GCZN01000007.1_2881
X42LTR_gt 2 2
X42LTRR_gt 2 2
X43LTR_gt 2 2
X43LTRR_gt 2 2
X49LTWR_gt 2 2
X49LTWRR_gt 3 3
X95LTWR_gt 3 2
X95LTWRR_gt 3 2
X99LTWR_gt 2 2
GCZN01000007.1_2970
X42LTR_gt 2
X42LTRR_gt 1
X43LTR_gt 2
X43LTRR_gt 2
X49LTWR_gt 2
X49LTWRR_gt 1
X95LTWR_gt 2
X95LTWRR_gt 2
X99LTWR_gt 2
dim(geno.numeric)
[1] 9 134421
dim(gts)
[1] 134421 11
get the principal components
pca <- prcomp(geno.numeric)
summary(pca)
Importance of components:
PC1 PC2 PC3 PC4 PC5
Standard deviation 78.9711 75.4129 60.8894 54.9581 52.3780
Proportion of Variance 0.2291 0.2089 0.1362 0.1110 0.1008
Cumulative Proportion 0.2291 0.4380 0.5742 0.6852 0.7860
PC6 PC7 PC8 PC9
Standard deviation 45.30719 43.88667 42.97555 1.288e-11
Proportion of Variance 0.07541 0.07076 0.06785 0.000e+00
Cumulative Proportion 0.86139 0.93215 1.00000 1.000e+00
plot it
plot.data <- pca$x %>%
as.data.frame %>%
rownames_to_column("sample") %>%
mutate(response=str_extract(sample, "(LTR|LTWR)")) %>%
gather(key="component", value="value",PC2:PC9)
plots <- map(sort(unique(plot.data$component)), function(x) {
plot.data %>%
filter(component==x) %>%
ggplot(aes(x=PC1, y= value, label=sample, color=response)) +
geom_point() + ylab(x)
}
)
plots
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
loadings <- as.data.frame(pca$rotation) %>%
rownames_to_column("snp") %>%
select(PC1, snp) %>%
arrange(desc(abs(PC1)))
loadings
contributions <- loadings %>%
separate(snp, into=c("contig", "pos"), sep="_") %>%
group_by(contig) %>%
summarize(abs.contribution = abs(sum(PC1)),
contribution = sum(PC1),
number_of_snps = n()) %>%
arrange(desc(abs.contribution))
contributions
bring in seq lengths
lengths <- read_csv("../input/Pinsaporeference1_lengths.csv", col_names = c("contig", "length"), skip=1) %>%
mutate(contig = str_remove(contig, " .*"))
Parsed with column specification:
cols(
contig = [31mcol_character()[39m,
length = [32mcol_double()[39m
)
lengths
contributions <- contributions %>%
left_join(lengths) %>%
mutate(snps_per_100bp = round(number_of_snps / length * 100, 2)) %>%
select(contig, contribution, length, number_of_snps, snps_per_100bp)
Joining, by = "contig"
contributions
write_csv(contributions, "../output/gene_contributions.csv.gz")
pc1pc2 <- pca$x %>%
as.data.frame() %>%
rownames_to_column("ID") %>%
select(ID, PC1, PC2) %>%
mutate(ID={str_replace(ID, "W", "N") %>%
str_replace("RR", "R2") %>%
str_remove_all("(X|_gt)") },
response=ifelse(str_detect(ID,"N"), "no recovery", "recovery"))
pc1pc2
pc1pc2 %>%
ggplot(aes(x=PC1, y = PC2, label=ID, color=response)) +
geom_point() +
geom_text_repel(show.legend=FALSE, direction="y")
ggsave("../output/PCA.pdf")
Saving 5.83 x 3.61 in image
Create a list of GCZN01054158.1 SNPs
GCZN01054158.1.loadings <- loadings %>%
filter(str_detect(snp, fixed("GCZN01054158.1"))) %>%
separate(snp,into = c("contig", "position"), sep="_",convert = TRUE) %>%
arrange(position)
GCZN01054158.1.loadings
GCZN01054158.snpinfo <- left_join(GCZN01054158.1.loadings, snps, by=c("contig" = "CHROM", "position" = "POS"))
GCZN01054158.snpinfo
write_csv(GCZN01054158.snpinfo, "../output/GCZN01054158.snpinfo.csv")